#####
## September 2019
## Run this code file first in R
## This generates the scaled estimates of attentiveness
#####

RNGkind("L'Ecuyer-CMRG")
set.seed(43)

library(pscl)
library(foreign)
library(tidyverse)
library(psych) 
library(ltm)

multiplot <- function(..., plotlist=NULL, file, cols=2, layout=NULL) {
  library(grid)
  
  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)
  
  numPlots = length(plots)
  
  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                     ncol = cols, nrow = ceiling(numPlots/cols))
  }
  
  if (numPlots==1) {
    print(plots[[1]])
    
  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
    
    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
      
      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}

#Website screener: scr_web_w1; scr_web_w2

#Biggest problem facing the country screener: scr_problem_w1; scr_problem_w2

#World War II grid screener: scr_grid_ww2_w1; scr_grid_ww2_w2

#Choose middle option grid screener: scr_grid_middle_w1; scr_grid_middle_w2

#Obama grid screener: scr_grid_obama_w1; scr_grid_obama_w2

#Two is greater than one screener : scr_grid_number_w1; scr_grid_number_w2

#Color screener: scr_color_w1; scr_color_w2

#Newspaper section screener: scr_section_w1; scr_newspaper_w2

data_screeners<-read.dta("Survey_Data.dta")
data_screeners$respondent<-1:2952
write.dta(data_screeners,file="Survey_Data.dta")
##### 
## Run 2 and 3-parameter IRT models to estimate attentiveness
#####

#####
## model using all screeners
#####

data2<-dplyr::select(data_screeners, scr_web_w1, scr_problem_w1, scr_grid_ww2_w1, scr_grid_middle_w1, scr_grid_obama_w1, scr_grid_number_w1, scr_color_w1, scr_section_w1)
data2<-rollcall(data2,
yea=1, nay=0, missing=c(NA), notInLegis=9,
legis.names=NULL, vote.names=NULL,
legis.data=NULL, vote.data=NULL,
desc=NULL, source=NULL)
model_screeners<-ideal(data2, store.item=T,normalize=T,maxiter = 20000, thin = 10)

## 2 parameter irt model
data2<-dplyr::select(data_screeners, scr_web_w1, scr_problem_w1, scr_grid_ww2_w1, scr_grid_middle_w1, scr_grid_obama_w1, scr_grid_number_w1, scr_color_w1, scr_section_w1)
model_screeners_irt<-ltm(data2~z1)
estimates_screeners_irt<-factor.scores(model_screeners_irt,resp.patterns = data2) 
temp<-estimates_screeners_irt[1]$score.dat
estimates_screeners_irt2<-dplyr::select(temp,z1)
estimates_screeners_irt2<-dplyr::rename(estimates_screeners_irt2,irt_z1=z1)
estimates_screeners_irt2$respondent<-1:2952

parameters<-as.data.frame(model_screeners[6])
vote1_disc<-median(parameters$beta.Vote.1.Discrimination.D1)
vote2_disc<-median(parameters$beta.Vote.2.Discrimination.D1)
vote3_disc<-median(parameters$beta.Vote.3.Discrimination.D1)
vote4_disc<-median(parameters$beta.Vote.4.Discrimination.D1)
vote5_disc<-median(parameters$beta.Vote.5.Discrimination.D1)
vote6_disc<-median(parameters$beta.Vote.6.Discrimination.D1)
vote7_disc<-median(parameters$beta.Vote.7.Discrimination.D1)
vote8_disc<-median(parameters$beta.Vote.8.Discrimination.D1)
disc<-as.data.frame(c(vote1_disc, vote2_disc,vote3_disc,vote4_disc,vote5_disc,vote6_disc,vote7_disc,vote8_disc) )

vote1_diff<-median(parameters$beta.Vote.1.Difficulty)
vote2_diff<-median(parameters$beta.Vote.2.Difficulty)
vote3_diff<-median(parameters$beta.Vote.3.Difficulty)
vote4_diff<-median(parameters$beta.Vote.4.Difficulty)
vote5_diff<-median(parameters$beta.Vote.5.Difficulty)
vote6_diff<-median(parameters$beta.Vote.6.Difficulty)
vote7_diff<-median(parameters$beta.Vote.7.Difficulty)
vote8_diff<-median(parameters$beta.Vote.8.Difficulty)
diff<-as.data.frame(c(vote1_diff, vote2_diff,vote3_diff,vote4_diff,vote5_diff,vote6_diff,vote7_diff,vote8_diff) )
  
predictions<-matrix(data=NA, nrow=401, ncol=9)
predictions<-as.data.frame(predictions)
colnames(predictions)<-c("attentiveness","Stand-alone: Websites"," Stand-alone: MIP","Grid: WWI/WWII","Grid: Check Middle Option","Grid: Obama first president","Grid: Two greater than one"," Stand-alone: Favorite Color"," Stand-alone: Section of Newspaper")
att<-seq(-4,4,.02)
predictions[,1]<-att
for (i in 1:401){
	for (j in 1:8){
		predictions[i,(j+1)]<-pnorm((disc[j,]*att[i])-diff[j,])
	}
}

plot(predictions)

predictions2 <- predictions %>% gather(Item, Probability_Correct, -attentiveness)
predictions2<-dplyr::rename(predictions2, Attentiveness=attentiveness)

pdf("Figures/8items_predictions.pdf", height=5)
ggplot(predictions2, aes(Attentiveness, Probability_Correct, colour = Item)) +
  geom_line()+theme_bw()+ylab("Pr(Correct)")
dev.off()

tif<-matrix(data=NA, nrow=401, ncol=9)
tif<-as.data.frame(tif)
colnames(tif)<-c("attentiveness","Stand-alone: Websites","Stand-alone: MIP","Grid: WWI/WWII","Grid: Check Middle Option","Grid: Obama first president","Grid: Two greater than one","Stand-alone: Favorite Color","Stand-alone: Section of Newspaper")

att<-seq(-4,4,.02)
tif[,1]<-att
for (i in 1:401){
	for (j in 1:8){
		tif[i,(j+1)]<-disc[j,]^2*pnorm((disc[j,]*att[i])-diff[j,])*(1-pnorm((disc[j,]*att[i])-diff[j,]))
	}
}

tif2 <- tif %>% gather(Item, Information, -attentiveness)
tif2<-dplyr::rename(tif2, Attentiveness=attentiveness)
tif2$Item <- as.character(tif2$Item)
tif2$Item <- factor(tif2$Item, levels=sort(unique(tif2$Item)))
tif2$line="dotted"
 
pdf("Figures/8items_information.pdf", height=5)
ggplot(tif2, aes(Attentiveness, Information, colour = Item)) +
  geom_line(aes(linetype=Item))+theme_bw()+
    scale_linetype_manual(values=c("dashed","dashed","dashed","dashed","solid","solid","solid","solid"))+
geom_vline(xintercept=0, linetype="dashed")
 dev.off() 
  
plot(model_screeners_irt,
legend = TRUE,
     cx = "bottomright",
     lwd = 3,
     cex.main = 1.5,
     cex.lab = 1.3,
cex = 0.8)

plot(model_screeners_irt,
legend = TRUE,
     cx = "topright",
     cex = 0.8,
     type = "IIC",
     annot = FALSE,
     lwd = 3,
     cex.main = 1.5,
     cex.lab = 1.3)

plot(model_screeners_irt, type="IIC",
     items = 0,
     lwd = 3,
     cex.main = 1.5,
     cex.lab = 1.3)
     
## 3 parameter irt model
data2<-dplyr::select(data_screeners, scr_web_w1, scr_problem_w1, scr_grid_ww2_w1, scr_grid_middle_w1, scr_grid_obama_w1, scr_grid_number_w1, scr_color_w1, scr_section_w1)
model_screeners_guessing<-tpm(data2)
estimates_screeners_guessing<-factor.scores(model_screeners_guessing,resp.patterns = data2) 
temp<-estimates_screeners_guessing[1]$score.dat
estimates_screeners_guessing2<-dplyr::select(temp,z1)
estimates_screeners_guessing2<-dplyr::rename(estimates_screeners_guessing2, guessing_z1=z1)
estimates_screeners_guessing2$respondent<-1:2952

plot(model_screeners_guessing,
legend = TRUE,
     cx = "bottomright",
     lwd = 3,
     cex.main = 1.5,
     cex.lab = 1.3,
cex = 0.8)


plot(model_screeners_guessing,
legend = TRUE,
     cx = "topright",
     cex = 0.8,
     type = "IIC",
     annot = FALSE,
     lwd = 3,
     cex.main = 1.5,
     cex.lab = 1.3)

plot(model_screeners_guessing, type="IIC",
     items = 0,
     lwd = 3,
     cex.main = 1.5,
     cex.lab = 1.3)

## Factor model
data2<-dplyr::select(data_screeners, scr_web_w1, scr_problem_w1, scr_grid_ww2_w1, scr_grid_middle_w1, scr_grid_obama_w1, scr_grid_number_w1, scr_color_w1, scr_section_w1)
mydata<-data2
mydata<- mydata[complete.cases(mydata), ]
fit <- factanal(mydata[,1:8], 1, rotation="varimax", scores="regression")
print(fit, digits=2, cutoff=.3, sort=TRUE)
#mydata<-cbind(mydata, fit$scores)

fit <- princomp(mydata[,3:6], cor=TRUE)
summary(fit) # print variance accounted for 
loadings(fit) # pc loadings 
factorscores<-fit$scores[,1]
factorscores<-as.data.frame(factorscores)
colnames(factorscores)[1]<-"factor_scores"
factorscores$respondent<-rownames(factorscores)

## compare 2 and 3 parameter models
data2<-rollcall(data2,
yea=1, nay=0, missing=c(NA), notInLegis=9,
legis.names=NULL, vote.names=NULL,
legis.data=NULL, vote.data=NULL,
desc=NULL, source=NULL)
estimates_screeners<-cbind(as.data.frame(data_screeners$respondent),as.data.frame(summary(model_screeners)[2]),as.data.frame(data_screeners[,692]))
colnames(estimates_screeners)<-c("respondent","ideal_mixed",  "scr_pass")
estimates_screeners_guessing2<-dplyr::select(estimates_screeners_guessing2, -respondent)
estimates_screeners<-cbind(estimates_screeners, estimates_screeners_guessing2)
estimates_screeners_irt2<-dplyr::select(estimates_screeners_irt2, -respondent)
estimates_screeners<-cbind(estimates_screeners, estimates_screeners_irt2)
estimates_screeners<-merge(estimates_screeners, factorscores, by.x="respondent", by.y="respondent",all.x=T)
cor(estimates_screeners$factor_scores, estimates_screeners$ideal, use="complete.obs")
cor(estimates_screeners$guessing_z1, estimates_screeners$ideal, use="complete.obs")
cor(estimates_screeners$irt_z1, estimates_screeners$ideal, use="complete.obs")
cor(estimates_screeners$irt_z1, estimates_screeners$guessing_z1, use="complete.obs")
anova(model_screeners_irt,model_screeners_guessing)

#####
## model using 4 grid screeners
#####

data2<-dplyr::select(data_screeners, scr_grid_ww2_w1, scr_grid_middle_w1, scr_grid_obama_w1, scr_grid_number_w1)
data2<-rollcall(data2,
yea=1, nay=0, missing=c(NA), notInLegis=9,
legis.names=NULL, vote.names=NULL,
legis.data=NULL, vote.data=NULL,
desc=NULL, source=NULL)
model_screeners_grid<-ideal(data2, store.item=T, normalize=T,maxiter = 20000, thin = 10)

## Mean passage rates for grid screeners
mean(
mean(data_screeners$scr_grid_ww2_w1, na.rm=T),
mean(data_screeners$scr_grid_middle_w1, na.rm=T),
mean(data_screeners$scr_grid_obama_w1, na.rm=T),
mean(data_screeners$scr_grid_number_w1, na.rm=T)
)

## 2 parameter irt model
data2<-dplyr::select(data_screeners, scr_grid_ww2_w1, scr_grid_middle_w1, scr_grid_obama_w1, scr_grid_number_w1)
model_screeners_irt<-ltm(data2~z1)
estimates_screeners_irt<-factor.scores(model_screeners_irt,resp.patterns = data2) 
temp<-estimates_screeners_irt[1]$score.dat
estimates_screeners_irt2<-dplyr::select(temp,z1)
estimates_screeners_irt2<-dplyr::rename(estimates_screeners_irt2,irt_z1=z1)
estimates_screeners_irt2$id<-1:2952

## 3 parameter irt model
data2<-dplyr::select(data_screeners, scr_grid_ww2_w1, scr_grid_middle_w1, scr_grid_obama_w1, scr_grid_number_w1)
model_screeners_guessing<-tpm(data2)
estimates_screeners_guessing<-factor.scores(model_screeners_guessing,resp.patterns = data2) 
temp<-estimates_screeners_guessing[1]$score.dat
estimates_screeners_guessing2<-dplyr::select(temp,z1)
estimates_screeners_guessing2<-dplyr::rename(estimates_screeners_guessing2, guessing_z1=z1)
estimates_screeners_guessing2$respondent<-1:2952

## compare 2 and 3 parameter models
data2<-rollcall(data2,
yea=1, nay=0, missing=c(NA), notInLegis=9,
legis.names=NULL, vote.names=NULL,
legis.data=NULL, vote.data=NULL,
desc=NULL, source=NULL)
estimates_screeners<-cbind(as.data.frame(data_screeners$respondent),as.data.frame(summary(model_screeners)[2]),as.data.frame(data_screeners[,692]))
colnames(estimates_screeners)<-c("respondent","ideal_mixed",  "scr_pass")
estimates_screeners<-cbind(estimates_screeners, estimates_screeners_guessing2)
estimates_screeners<-cbind(estimates_screeners, estimates_screeners_irt2)
cor(estimates_screeners$guessing_z1, estimates_screeners$ideal, use="complete.obs")
cor(estimates_screeners$irt_z1, estimates_screeners$ideal, use="complete.obs")
cor(estimates_screeners$irt_z1, estimates_screeners$guessing_z1, use="complete.obs")
anova(model_screeners_irt,model_screeners_guessing)

#####
## model using 4 non-grid screeners
#####

data2<-dplyr::select(data_screeners, scr_web_w1, scr_problem_w1, scr_color_w1, scr_section_w1)
data2<-rollcall(data2,
yea=1, nay=0, missing=c(NA), notInLegis=9,
legis.names=NULL, vote.names=NULL,
legis.data=NULL, vote.data=NULL,
desc=NULL, source=NULL)
model_screeners_nongrid<-ideal(data2, store.item=T, normalize=T,maxiter = 20000, thin = 10)

## Mean passage rates for non-grid screeners
mean(
mean(data_screeners$scr_web_w1, na.rm=T),
mean(data_screeners$scr_problem_w1, na.rm=T),
mean(data_screeners$scr_color_w1, na.rm=T),
mean(data_screeners$scr_section_w1, na.rm=T))

## 2 parameter irt model
data2<-dplyr::select(data_screeners, scr_web_w1, scr_problem_w1, scr_color_w1, scr_section_w1)
model_screeners_irt<-ltm(data2~z1)
estimates_screeners_irt<-factor.scores(model_screeners_irt,resp.patterns = data2) 
temp<-estimates_screeners_irt[1]$score.dat
estimates_screeners_irt2<-dplyr::select(temp,z1)
estimates_screeners_irt2<-dplyr::rename(estimates_screeners_irt2,irt_z1=z1)
estimates_screeners_irt2$respondent<-1:2952

## 3 parameter irt model
data2<-dplyr::select(data_screeners, scr_web_w1, scr_problem_w1, scr_color_w1, scr_section_w1)
model_screeners_guessing<-tpm(data2)
estimates_screeners_guessing<-factor.scores(model_screeners_guessing,resp.patterns = data2) 
temp<-estimates_screeners_guessing[1]$score.dat
estimates_screeners_guessing2<-dplyr::select(temp,z1)
estimates_screeners_guessing2<-dplyr::rename(estimates_screeners_guessing2, guessing_z1=z1)
estimates_screeners_guessing2$respondent<-1:2952

## compare 2 and 3 parameter models
data2<-rollcall(data2,
yea=1, nay=0, missing=c(NA), notInLegis=9,
legis.names=NULL, vote.names=NULL,
legis.data=NULL, vote.data=NULL,
desc=NULL, source=NULL)
estimates_screeners<-cbind(as.data.frame(data_screeners$respondent),as.data.frame(summary(model_screeners)[2]),as.data.frame(data_screeners[,692]))
colnames(estimates_screeners)<-c("respondent","ideal_mixed",  "scr_pass")
estimates_screeners<-cbind(estimates_screeners, estimates_screeners_guessing2)
estimates_screeners<-cbind(estimates_screeners, estimates_screeners_irt2)
cor(estimates_screeners$guessing_z1, estimates_screeners$ideal, use="complete.obs")
cor(estimates_screeners$irt_z1, estimates_screeners$ideal, use="complete.obs")
cor(estimates_screeners$irt_z1, estimates_screeners$guessing_z1, use="complete.obs")
anova(model_screeners_irt,model_screeners_guessing)


#####
## model using 4 mixed grid and non-grid with highest Disc.
#####
data2<-dplyr::select(data_screeners, scr_web_w1, scr_section_w1, scr_grid_middle_w1, scr_grid_obama_w1)
data2<-rollcall(data2,
yea=1, nay=0, missing=c(NA), notInLegis=9,
legis.names=NULL, vote.names=NULL,
legis.data=NULL, vote.data=NULL,
desc=NULL, source=NULL)
model_screeners_mixed<-ideal(data2, store.item=T, normalize=T,maxiter = 20000, thin = 10)

## 2 parameter irt model
data2<-dplyr::select(data_screeners, scr_web_w1, scr_section_w1, scr_grid_middle_w1, scr_grid_obama_w1)
model_screeners_irt<-ltm(data2~z1)
estimates_screeners_irt<-factor.scores(model_screeners_irt,resp.patterns = data2) 
temp<-estimates_screeners_irt[1]$score.dat
estimates_screeners_irt2<-dplyr::select(temp,z1)
estimates_screeners_irt2<-dplyr::rename(estimates_screeners_irt2,irt_z1=z1)
estimates_screeners_irt2$respondent<-1:2952

## 3 parameter irt model
data2<-dplyr::select(data_screeners, scr_web_w1, scr_section_w1, scr_grid_middle_w1, scr_grid_obama_w1)
model_screeners_guessing<-tpm(data2)
estimates_screeners_guessing<-factor.scores(model_screeners_guessing,resp.patterns = data2) 
temp<-estimates_screeners_guessing[1]$score.dat
estimates_screeners_guessing2<-dplyr::select(temp,z1)
estimates_screeners_guessing2<-dplyr::rename(estimates_screeners_guessing2, guessing_z1=z1)
estimates_screeners_guessing2$respondent<-1:2952

## compare 2 and 3 parameter models
data2<-rollcall(data2,
yea=1, nay=0, missing=c(NA), notInLegis=9,
legis.names=NULL, vote.names=NULL,
legis.data=NULL, vote.data=NULL,
desc=NULL, source=NULL)
estimates_screeners<-cbind(as.data.frame(data_screeners$respondent),as.data.frame(summary(model_screeners)[2]),as.data.frame(data_screeners[,692]))
colnames(estimates_screeners)<-c("respondent","ideal_mixed",  "scr_pass")
estimates_screeners<-cbind(estimates_screeners, estimates_screeners_guessing2)
estimates_screeners<-cbind(estimates_screeners, estimates_screeners_irt2)
cor(estimates_screeners$guessing_z1, estimates_screeners$ideal, use="complete.obs")
cor(estimates_screeners$irt_z1, estimates_screeners$ideal, use="complete.obs")
cor(estimates_screeners$irt_z1, estimates_screeners$guessing_z1, use="complete.obs")
anova(model_screeners_irt,model_screeners_guessing)

save.image(file="screening_190924.RData")
